/*==============================================================================
SCATTER PLOT BANK VALUES WITH ANNOTATIONS
==============================================================================
Purpose:
This script generates scatter plots showing bank value components against the
uninsured share of domestic deposits, including annotations for specific banks.
It is used both in the main analysis and for extensions with alternative specifications (in globals)

Input:
- $path_clean/analysis_ready_data.dta (Prepared bank-quarter panel dataset)
- Global variables: $fig_scatter_annotated_dec, $fig_scatter_annotated_feb (main analysis output filenames)
- Global variable: $fig_scatter_annotated_ext (extension output filename)
- Global variable: $periods_to_plot (which periods to generate)

Output:
- $path_figures/${fig_scatter_annotated_dec}.pdf (Dec 2021 figure, if requested)
- $path_figures/${fig_scatter_annotated_feb}.pdf (Feb 2023 figure, if requested)
- $path_figures/${fig_scatter_annotated_ext}.pdf (Extension figure, if requested)

Paper Correspondence:
- Main Analysis: Figure 9 Panel A (Dec 2021) and Panel B (Feb 2023)
- Extensions: Figures A.5 - A.10 (Feb 2023 only)

Last updated: July 03, 2025
==============================================================================*/

display "--- Starting Scatter Plot Bank Values with Annotations ---"

**===============================================================================
* Step 1: Generate Scatter Plots with Annotations
*===============================================================================
* Purpose: Create the scatter plots for each requested period, adding boxes and labels
* for specific banks.

* Define RSSD IDs for the specific banks to be highlighted
local svb_rssdid = 802866 // Silicon Valley Bank
local sig_rssdid = 2942690 // Signature Bank
local frb_rssdid = 4114567  // First Republic Bank
local highlight_rssdids = "`sig_rssdid',`frb_rssdid',`svb_rssdid'" 


* Parse the periods to plot from global variable
local periods_list $periods_to_plot

* Loop through the desired periods
foreach suffix in `periods_list' {

    * Set locals based on the current suffix
    if "`suffix'" == "dec2021" {
        loc title "Dec 2021"
        loc period_filter "dec2021" 
        loc xvar "uninsuredsh_domdep" 
    }
    else if "`suffix'" == "feb2023" {
        loc title "Feb 2023"
        loc period_filter "feb2023" 
        loc xvar "uninsuredsh_domdep" 

    }
        else {
        display as error "Unknown period suffix: `suffix'"
        continue
    }


    * Define the y-variables (bank value components for the current period)
    loc yvar_noDF "v_noDF" // Assets - Deposits
    loc yvar_0 "v_0"       // Assets - Deposits + Insured DF
    loc yvar_1 "v_1"       // Assets - Deposits + Insured DF + Uninsured DF

    loc lcolorstyle "black%50" // Line color for boxes

    display "Generating large banks values for `title'"

    * Calculate dynamic y-axis range based on actual data values
    qui sum `yvar_noDF' if period=="`period_filter'" & assets_bill>=100, meanonly
    local y_min = r(min)
    local y_max = r(max)
    
    qui sum `yvar_0' if period=="`period_filter'" & assets_bill>=100, meanonly
    local y_min = min(`y_min', r(min))
    local y_max = max(`y_max', r(max))
    
    qui sum `yvar_1' if period=="`period_filter'" & assets_bill>=100, meanonly
    local y_min = min(`y_min', r(min))
    local y_max = max(`y_max', r(max))
    
    * Add padding to y-axis range (10% of the range on each side)
    local y_range = `y_max' - `y_min'
    local y_padding = `y_range' * 0.1
    local y_axis_min = `y_min' - `y_padding'
    local y_axis_max = `y_max' + `y_padding'

        
    * Ensure y-axis minimum doesn't go above -5 and max below 25 (-5,25 is the default range)
    local y_axis_min = min(`y_axis_min', -5)
    local y_axis_max = max(`y_axis_max', 25)
    
    
    * Round to nice numbers for axis labels
    local y_axis_min = floor(`y_axis_min'/5) * 5
    local y_axis_max = ceil(`y_axis_max'/5) * 5
    
    * Calculate appropriate y-axis label increment
    local y_axis_range = `y_axis_max' - `y_axis_min'
    local y_increment = 5
    if `y_axis_range' > 50 {
        local y_increment = 10
    }
    if `y_axis_range' > 100 {
        local y_increment = 20
    }

* Calculate box coordinates automatically for each highlighted bank
    * Box parameters - make them responsive to the data range
    local box_width_half = 0.01  // Half width of box around the x-coordinate
    local box_height_buffer = max(0.8, `y_range' * 0.05)  // Dynamic buffer based on data range
    local min_label_separation = 0.05  // Minimum horizontal distance between labels
    local legend_left_boundary = 0.7   // Left boundary of legend area
    local legend_right_boundary = 1  // Right boundary of legend area
    local legend_top_boundary = `y_axis_max' * 0.9     // Dynamic legend boundaries
    local legend_bottom_boundary = `y_axis_max' * 0.6  // Dynamic legend boundaries

    * Initialize locals to store box line and label commands for the graph
    local box_lines ""
    local box_labels ""
    
    * Store label positions to check for overlaps
    local label_x_positions ""
    local label_y_positions ""

    * Loop through each highlighted bank's RSSD ID to calculate box coordinates
    foreach rssdid of numlist `highlight_rssdids' {
       
        * Get bank name for labeling based on RSSD ID
        local bank_name = ""
        if `rssdid' == `svb_rssdid' local bank_name = "SVB"
        if `rssdid' == `sig_rssdid' local bank_name = "Signature"
        if `rssdid' == `frb_rssdid' local bank_name = "First Republic"

        * Get the x-coordinate (uninsured share) for the current bank and period
        sum `xvar' if rssdid==`rssdid' & period=="`period_filter'", meanonly
        if r(N) > 0 { // Ensure the bank-period exists in the data
            local x_coord = r(mean) // Store the uninsured share value

            * Get the y-coordinates for all three value measures for the current bank and period
            qui sum `yvar_noDF' if rssdid==`rssdid' & period=="`period_filter'", meanonly
            local y_noDF = r(mean)

            qui sum `yvar_0' if rssdid==`rssdid' & period=="`period_filter'", meanonly
            local y_0 = r(mean)

            qui sum `yvar_1' if rssdid==`rssdid' & period=="`period_filter'", meanonly
            local y_1 = r(mean)

            * Calculate the minimum and maximum y-values among the three measures for this bank
            local y_min = min(`y_noDF', `y_0', `y_1') - `box_height_buffer'
            local y_max = max(`y_noDF', `y_0', `y_1') + `box_height_buffer'

            * Calculate the left and right x-coordinates for the box
            local x_left = `x_coord' - `box_width_half'
            local x_right = `x_coord' + `box_width_half'

            * Add box lines to the plotting command string (`box_lines`)
            local box_lines "`box_lines' (scatteri `y_max' `x_left' `y_max' `x_right', recast(line) lc(`lcolorstyle'))" // Top line
            local box_lines "`box_lines' (scatteri `y_min' `x_left' `y_min' `x_right', recast(line) lc(`lcolorstyle'))" // Bottom line
            local box_lines "`box_lines' (scatteri `y_max' `x_left' `y_min' `x_left', recast(line) lc(`lcolorstyle'))" // Left line
            local box_lines "`box_lines' (scatteri `y_max' `x_right' `y_min' `x_right', recast(line) lc(`lcolorstyle'))" // Right line

            * Determine initial label position (default: above the box) - use dynamic spacing
            local label_spacing = max(1.5, `y_range' * 0.08)
            local label_y = `y_max' + `label_spacing'
            local label_above = 1  // Flag: 1 = above, 0 = below
            
            * Check if label would conflict with legend area
            if (`x_coord' >= `legend_left_boundary' & `x_coord' <= `legend_right_boundary' & ///
                `label_y' >= `legend_bottom_boundary' & `label_y' <= `legend_top_boundary') {
                local label_y = `y_min' - `label_spacing'  // Move to below the box
                local label_above = 0
            }
            
            * Check for overlaps with existing labels
            local overlap_found = 0
            local num_existing = 0
            foreach x_pos in `label_x_positions' {
                local num_existing = `num_existing' + 1
                local existing_y : word `num_existing' of `label_y_positions'
                
                * Check if labels would overlap (within minimum separation distance)
                if (abs(`x_coord' - `x_pos') < `min_label_separation' & ///
                    abs(`label_y' - `existing_y') < `label_spacing' * 1.5) {
                    local overlap_found = 1
                    break
                }
            }
            
            * If overlap found and currently above, try below
            if `overlap_found' & `label_above' {
                local label_y = `y_min' - `label_spacing'
                local label_above = 0
                
                * Check again for overlaps in the new position
                local overlap_found = 0
                local num_existing = 0
                foreach x_pos in `label_x_positions' {
                    local num_existing = `num_existing' + 1
                    local existing_y : word `num_existing' of `label_y_positions'
                    
                    if (abs(`x_coord' - `x_pos') < `min_label_separation' & ///
                        abs(`label_y' - `existing_y') < `label_spacing' * 1.5) {
                        local overlap_found = 1
                        break
                    }
                }
            }

            * Store the final label position for future overlap checks
            local label_x_positions "`label_x_positions' `x_coord'"
            local label_y_positions "`label_y_positions' `label_y'"

            * Add label command to the plotting command string (`box_labels`)
            local box_labels "`box_labels' text(`label_y' `x_coord' "`bank_name'", color(black))"
        }
    }

        * Generate the scatter plot with automatically calculated boxes and labels
            twoway `box_lines' /// Add the box lines generated above
                    (scatter `yvar_1' `xvar' if period=="`period_filter'" & assets_bill>=100 & !inlist(rssdid,`highlight_rssdids') , msize(2) msymbol(O) mcolor(sand%30) lcolor(sand%30)) /// Plot 3: A-D + DF_I + DF_U (excluding highlighted banks)
                    (scatter `yvar_0' `xvar' if period=="`period_filter'" & assets_bill>=100 & !inlist(rssdid,`highlight_rssdids') , msize(2) msymbol(T) mcolor(midblue%30) lcolor(midblue%30)) /// Plot 2: A-D + DF_I (excluding highlighted banks)
                    (scatter `yvar_noDF' `xvar' if period=="`period_filter'" & assets_bill>=100 & !inlist(rssdid,`highlight_rssdids') , msize(2) msymbol(S) mcolor(red%30) lcolor(red%30) xtitle("Uninsured %") ytitle("Bank value")) /// Plot 1: A-D (excluding highlighted banks)
                    (scatter `yvar_1' `xvar' if period=="`period_filter'" & inlist(rssdid,`highlight_rssdids'),mlabsize(3) msize(2) mcolor(sand) msymbol(O) mlabc(sand) ) /// Plot 6: A-D + DF_I + DF_U (highlighted banks)
                    (scatter `yvar_0' `xvar' if period=="`period_filter'" & inlist(rssdid,`highlight_rssdids'), mlabsize(3) msize(2) mcolor(midblue) msymbol(T) mlabc(midblue) )  /// Plot 5: A-D + DF_I (highlighted banks)
                    (scatter `yvar_noDF' `xvar' if period=="`period_filter'" & inlist(rssdid,`highlight_rssdids'), mlabsize(3) msize(2) mcolor(red) msymbol(S) mlabc(red) ), /// Plot 4: A-D (highlighted banks)
                    /* Graph Options */ ///
                    yscale(r(`y_axis_min' `y_axis_max')) /* Dynamic Y-axis range */ ///
                    ylabel(`y_axis_min'(`y_increment')`y_axis_max',grid) /* Dynamic Y-axis labels and grid lines */ ///
                    graphregion(color(white)) /* Graph region background color */ ///
                    xscale(r(0.2 1)) /* X-axis scale */ ///
                    xlabel(0.2(0.2)1) /* X-axis labels */ ///
                    xsize(7) /* Graph width in inches */ ///
                    plotregion(lcolor(black)) /* Plot region border color */ ///
                    yline(0, lc(black) lp(dash)) /* Add a dashed line at y=0 */ ///
                    legend(order(18 "A {&minus} D " 17 "A {&minus} D + DF{subscript:I}" 16 "A {&minus} D + DF{subscript:I} + DF{subscript:U}") pos(2) col(1) ring(0)) ///
                    scheme(s1color) /* Plot scheme */ ///
                    `box_labels' // Add the bank labels generated above

    **===============================================================================
    * Export Figure
    **===============================================================================
    * Purpose: Save the plot with the filename specified by global variable

    * Determine output filename based on context (main analysis or extension)
    local output_filename = ""
    if "${ext_suffix}" != "" {
        local output_filename = "${fig_scatter_annotated_ext}"
    }
    else if "`suffix'" == "dec2021" {
        local output_filename = "${fig_scatter_annotated_dec}"
    }
    else if "`suffix'" == "feb2023" {
        local output_filename = "${fig_scatter_annotated_feb}"
    }
    else {
        display as error "Could not determine output filename for suffix: `suffix' and ext_suffix: ${ext_suffix}"
        continue
    }

    graph export "$path_figures/`output_filename'.pdf", as(pdf) fontface("Georgia") replace

    display "--- Plot for `title' completed ---"
    display "--- Output saved as: `output_filename'.pdf ---"

} 

display "--- Scatter Plot Bank Values with Annotations completed ---"
